**********************************************************************
** title:	The rural-urban cleavage in US presidential elections	**
** data:	ANES and CES files										**
** purpose:	code datasets and merge 								**
** authors:	pautonnier et al.										**
**********************************************************************

/*

** TABLE OF CONTENTS

** 1. Preamble 
** 2. Code CES data 
** 3. Load and code cumulative ANES dataset 
** 4. Append CES information 
** 5. Recode race variables 
** 6. Recode geographic areas CES 
** 7. Save final dataset 

*/

* -------------
* 1. preamble
* -------------

	** set working directory 
	cd "~/Dropbox/JELS_Rural_Urban_USA"

	** install software
	
	/* go to https://www.romain-lachat.ch/software.html, download the cindex.ado (written by R. Lachat) file and save it in your personal Stata folder. */
	
	/* install the clarify package in stata (written by Tomz et al.) */
	
	** install standard packages (uncomment if not installed yet)
	*ssc install outreg2

	** set scheme for graphs
	grstyle init
	grstyle set plain, nogrid noextend

	** increase mat size if necessary
	set maxvar 10000

* -------------------
* 2. code CES data
* -------------------

	/* note that the Cooperative Election Study (CES) datasets are saved under the name CCES, 
	referring to the previously used acronym for this project */
	
	
	** 2008 CES **
	
	* load data and add ruca information
	use "Datasets/Stata/cces/cces2008_common.dta", clear
	gen zip_code=string(V202)
	merge m:1 zip_code using "Datasets/Stata/cces/ruca2010-zipcode.dta"
	drop if _merge==2
	drop _merge
	
	* code rural variable
	gen rural=0 if ruca1==1			// Metropolitan area core: primary flow within an urbanized area (UA)
	replace rural=0 if ruca1==2		// Metropolitan area high commuting: primary flow 30% or more to a UA
	replace rural=0 if ruca1==3		// Metropolitan area low commuting: primary flow 10% to 30% to a UA
	replace rural=1 if ruca1==4		// Micropolitan area core: primary flow within an Urban Cluster of 10,000 to 49,999 (large UC
	replace rural=1 if ruca1==5		// Micropolitan high commuting: primary flow 30% or more to a large UC
	replace rural=1 if ruca1==6		// Micropolitan low commuting: primary flow 10% to 30% to a large UC
	replace rural=1 if ruca1==7		// Small town core: primary flow within an Urban Cluster of 2,500 to 9,999 (small UC)
	replace rural=1 if ruca1==8		// Small town high commuting: primary flow 30% or more to a small UC
	replace rural=1 if ruca1==9		// Small town low commuting: primary flow 10% to 30% to a small UC
	replace rural=1 if ruca1==10	// Rural areas: primary flow to a tract outside a UA or UC
	
	* vote democrat vs rep
	gen votedem=0 if CC327==1		// John McCain
	replace votedem=1 if CC327==2	// Barack Obama
	
	* vote rep vs democrats
	gen voterep=0 if CC327==2
	replace voterep=1 if CC327==1
	
	* marital status 
	gen married = 0
	replace married = 1 if V214 == 1

    * children
	gen children = 0
	replace children = 1 if V242 == 1
    
	* migration 
	gen immigrant = 0
	replace immigrant = 1 if CC332 == 1 | CC332 == 2
	gen migration_background = 0
	replace migration_background = 1 if CC332 == 3 | CC332 == 4

	* work status 
	gen work_status_cond = .
	label variable work_status_cond "Employment status (condensed)"
	replace work_status_cond = 1 if V209 == 1 | V209 == 2 | V209 == 3    // Employed
	replace work_status_cond = 2 if V209 == 4                                // Unemployed
	replace work_status_cond = 3 if V209== 5 | V209 == 6                  // Retired/Disabled
	replace work_status_cond = 4 if V209 == 7                                // Homemaker
	replace work_status_cond = 5 if V209 == 8                                // Student
	replace work_status_cond = 6 if V209 == 9                                // Other

	label define work_status_cond_lbl 1 "Employed" 2 "Unemployed" 3 "Retired/Disabled" ///
                                 4 "Homemaker" 5 "Student" 6 "Other"
	
	* age group
	gen age=2008-V207
	gen agegroup=1 if age>=17 & age<=24 
	replace agegroup=2 if age>=25 & age<=34 
	replace agegroup=3 if age>=35 & age<=44 
	replace agegroup=4 if age>=45 & age<=54 
	replace agegroup=5 if age>=55 & age<=64 
	replace agegroup=6 if age>=65 & age<=74 
	replace agegroup=7 if age>=75 & age!=. 

	* gender
	gen female=1 if V208==2
	replace female=0 if V208==1		

	* education (note that these are not the same categories as in ANES)
	clonevar education=V213

	* race
	clonevar race=V211
	tab race, gen(racegroup)
	gen white=1 if V211==1
	replace white=0 if V211!=1 & V211!=.

	*census region
	gen census=1 if V252==1		// Northeast
	replace census=2 if V252==2	// Midwest
	replace census=3 if V252==3	// South
	replace census=4 if V252==4	// West

	* household income (roughly 5 equally sized groups to match quintile coding in ANES)
	gen income=1 if V246>=1 & V246<=5
	replace income=2 if V246>=6 & V246<=7
	replace income=3 if V246>=8 & V246<=9
	replace income=4 if V246>=10 & V246<=12
	replace income=5 if V246>=13 & V246<=15

	* member of labor union in the household
	gen laborunion=1 if CC329==2
	replace laborunion=1 if CC329==3
	replace laborunion=1 if CC329==4
	replace laborunion=0 if CC329==1

	* religion in 4 major groups
	gen religion=1 if V219==1 					// Protestant
	replace religion=2 if V219==2 				// Catholic
	replace religion=3 if V219==5 				// Jewish
	replace religion=4 if V219==3				// Other
	replace religion=4 if V219==4				// Other
	replace religion=4 if V219>=6 & V219<=12	// Other
	
	* liberal-conservative self-placement
	gen ideology=V243			// from 1 = very liberal to 5 = very conservative
	recode ideology (6=.)		// not sure
	
	* year
	gen year=2008

	* save dataset
	save "Datasets/Stata/cces/cces2008_coded.dta", replace


	** 2012 CES **
	
	* load data and add ruca information
	use "Datasets/Stata/cces/cces2012_common.dta", clear
	gen zip_code=string(inputzip)
	merge m:1 zip_code using "Datasets/Stata/cces/ruca2010-zipcode.dta"
	drop if _merge==2
	drop _merge

	* rural variable
	gen rural=0 if ruca1==1			// Metropolitan area core: primary flow within an urbanized area (UA)
	replace rural=0 if ruca1==2		// Metropolitan area high commuting: primary flow 30% or more to a UA
	replace rural=0 if ruca1==3		// Metropolitan area low commuting: primary flow 10% to 30% to a UA
	replace rural=1 if ruca1==4		// Micropolitan area core: primary flow within an Urban Cluster of 10,000 to 49,999 (large UC
	replace rural=1 if ruca1==5		// Micropolitan high commuting: primary flow 30% or more to a large UC
	replace rural=1 if ruca1==6		// Micropolitan low commuting: primary flow 10% to 30% to a large UC
	replace rural=1 if ruca1==7		// Small town core: primary flow within an Urban Cluster of 2,500 to 9,999 (small UC)
	replace rural=1 if ruca1==8		// Small town high commuting: primary flow 30% or more to a small UC
	replace rural=1 if ruca1==9		// Small town low commuting: primary flow 10% to 30% to a small UC
	replace rural=1 if ruca1==10	// Rural areas: primary flow to a tract outside a UA or UC

	* vote democrat vs rep
	gen votedem=0 if CC410a==2		// Mitt Romney
	replace votedem=1 if CC410a==1	// Barack Obama

	* vote rep vs democrats
	gen voterep=0 if CC410a==1
	replace voterep=1 if CC410a==2
	
	* age group
	gen age=2012-birthyr
	gen agegroup=1 if age>=17 & age<=24 
	replace agegroup=2 if age>=25 & age<=34 
	replace agegroup=3 if age>=35 & age<=44 
	replace agegroup=4 if age>=45 & age<=54 
	replace agegroup=5 if age>=55 & age<=64 
	replace agegroup=6 if age>=65 & age<=74 
	replace agegroup=7 if age>=75 & age!=. 

	* sex
	gen female=1 if gender==2
	replace female=0 if gender==1		

	* education (note that this variable does not include the same categories as in ANES)
	clonevar education=educ

	* race
	tab race, gen(racegroup)
	gen white=1 if race==1
	replace white=0 if race!=1 & race!=.

	* work status 
	gen work_status_cond = .
	label variable work_status_cond "Employment status (condensed)"
	replace work_status_cond = 1 if employ == 1 | employ == 2 | employ == 3    // Employed
	replace work_status_cond = 2 if employ == 4                                // Unemployed
	replace work_status_cond = 3 if employ == 5 | employ == 6                  // Retired/Disabled
	replace work_status_cond = 4 if employ == 7                                // Homemaker
	replace work_status_cond = 5 if employ == 8                                // Student
	replace work_status_cond = 6 if employ == 9                                // Other

	label define work_status_cond_lbl 1 "Employed" 2 "Unemployed" 3 "Retired/Disabled" ///
                                 4 "Homemaker" 5 "Student" 6 "Other"
	 * marital status 
	gen married = 0
	replace married = 1 if marstat == 1
	
    *  children 
	gen children = 0
	replace children = 1 if child18 == 1

     * migration
	gen immigrant = 0
	replace immigrant = 1 if immstat == 1 | immstat == 2
	gen migration_background = 0
	replace migration_background = 1 if immstat == 3 | immstat == 4
	replace migration_background = 1 if immigrant == 1
	
	*census region
	gen census=1 if state=="ME"		// Northeast
	replace census=1 if state=="VT"
	replace census=1 if state=="NH"
	replace census=1 if state=="MA"
	replace census=1 if state=="CT"
	replace census=1 if state=="RI"
	replace census=1 if state=="NY"
	replace census=1 if state=="PA"
	replace census=1 if state=="NJ"
	replace census=2 if state=="ND"	// Midwest
	replace census=2 if state=="SD"
	replace census=2 if state=="NE"
	replace census=2 if state=="KS"
	replace census=2 if state=="MN"
	replace census=2 if state=="IA"
	replace census=2 if state=="MO"
	replace census=2 if state=="WI"
	replace census=2 if state=="IL"
	replace census=2 if state=="MI"
	replace census=2 if state=="IN"
	replace census=2 if state=="OH"
	replace census=3 if state=="OK" // South
	replace census=3 if state=="TX"
	replace census=3 if state=="AR"
	replace census=3 if state=="LA"
	replace census=3 if state=="KY"
	replace census=3 if state=="TN"
	replace census=3 if state=="MS"
	replace census=3 if state=="AL"
	replace census=3 if state=="WV"
	replace census=3 if state=="VA"
	replace census=3 if state=="MD"
	replace census=3 if state=="DE"
	replace census=3 if state=="NC"
	replace census=3 if state=="SC"
	replace census=3 if state=="GA"
	replace census=3 if state=="FL"
	replace census=4 if state=="WA"	// West
	replace census=4 if state=="OR"
	replace census=4 if state=="CA"
	replace census=4 if state=="MT"
	replace census=4 if state=="ID"
	replace census=4 if state=="WY"
	replace census=4 if state=="NV"
	replace census=4 if state=="UT"
	replace census=4 if state=="CO"
	replace census=4 if state=="AZ"
	replace census=4 if state=="NM"
	replace census=4 if state=="AK"
	replace census=4 if state=="HI"

	* household income (roughly 5 equally sized groups to match quintile coding in ANES)
	gen income=1 if faminc>=1 & faminc<=3
	replace income=2 if faminc>=4 & faminc<=5
	replace income=3 if faminc>=6 & faminc<=7
	replace income=4 if faminc>=8 & faminc<=11
	replace income=5 if faminc>=12 & faminc<=32

	* member of labor union in the household
	gen laborunion=1 if union==1		// member union
	replace laborunion=1 if unionhh==1	// household member is union member
	replace laborunion=0 if union!=1 & unionhh!=1 & union!=. & unionhh!=.

	* religion in 4 major groups
	gen religion=1 if religpew==1 					// Protestant
	replace religion=2 if religpew==2 				// Catholic
	replace religion=3 if religpew==5 				// Jewish
	replace religion=4 if religpew==3				// Other
	replace religion=4 if religpew==4				// Other
	replace religion=4 if religpew>=6 & religpew<=12	// Other
	
	* ideology (liberal conservative, 7 pt scale)
	gen ideology=CC334A
	recode ideology (8=.)
	
	* year
	gen year=2012

	* save dataset
	save "Datasets/Stata/cces/cces2012_coded.dta", replace


	** 2016 CES **
	
	* load data and add ruca information
	use "Datasets/Stata/cces/cces2016_common.dta", clear
	gen zip_code=string(inputzip)
	merge m:1 zip_code using "Datasets/Stata/cces/ruca2010-zipcode.dta"
	drop if _merge==2
	drop _merge

	* rural variable
	gen rural=0 if ruca1==1			// Metropolitan area core: primary flow within an urbanized area (UA)
	replace rural=0 if ruca1==2		// Metropolitan area high commuting: primary flow 30% or more to a UA
	replace rural=0 if ruca1==3		// Metropolitan area low commuting: primary flow 10% to 30% to a UA
	replace rural=1 if ruca1==4		// Micropolitan area core: primary flow within an Urban Cluster of 10,000 to 49,999 (large UC
	replace rural=1 if ruca1==5		// Micropolitan high commuting: primary flow 30% or more to a large UC
	replace rural=1 if ruca1==6		// Micropolitan low commuting: primary flow 10% to 30% to a large UC
	replace rural=1 if ruca1==7		// Small town core: primary flow within an Urban Cluster of 2,500 to 9,999 (small UC)
	replace rural=1 if ruca1==8		// Small town high commuting: primary flow 30% or more to a small UC
	replace rural=1 if ruca1==9		// Small town low commuting: primary flow 10% to 30% to a small UC
	replace rural=1 if ruca1==10	// Rural areas: primary flow to a tract outside a UA or UC

	* vote democrat vs rep
	gen votedem=0 if CC16_410a==1		// Donald Trump
	replace votedem=1 if CC16_410a==2	// Hillary Clinton

	* vote rep vs democrats
	gen voterep=0 if CC16_410a==2		
	replace voterep=1 if CC16_410a==1	
	
	* age group
	gen age=2016-birthyr
	gen agegroup=1 if age>=17 & age<=24 
	replace agegroup=2 if age>=25 & age<=34 
	replace agegroup=3 if age>=35 & age<=44 
	replace agegroup=4 if age>=45 & age<=54 
	replace agegroup=5 if age>=55 & age<=64 
	replace agegroup=6 if age>=65 & age<=74 
	replace agegroup=7 if age>=75 & age!=. 

	* sex
	gen female=1 if gender==2
	replace female=0 if gender==1		

	* education (note that this variable does not have the same categories as ANES)
	clonevar education=educ

	* race
	tab race, gen(racegroup)
	gen white=1 if race==1
	replace white=0 if race!=1 & race!=.
	
	* marital status 
	gen married = 0
	replace married = 1 if marstat == 1

    *  children 
	gen children = 0
	replace children = 1 if child18 == 1

	* work status 
	gen work_status_cond = .
	label variable work_status_cond "Employment status (condensed)"
	replace work_status_cond = 1 if employ == 1 | employ == 2 | employ == 3    // Employed
	replace work_status_cond = 2 if employ == 4                                // Unemployed
	replace work_status_cond = 3 if employ == 5 | employ == 6                  // Retired/Disabled
	replace work_status_cond = 4 if employ == 7                                // Homemaker
	replace work_status_cond = 5 if employ == 8                                // Student
	replace work_status_cond = 6 if employ == 9                                // Other

	label define work_status_cond_lbl 1 "Employed" 2 "Unemployed" 3 "Retired/Disabled" ///
                                 4 "Homemaker" 5 "Student" 6 "Other"

     * migration
	gen immigrant = 0
	replace immigrant = 1 if immstat == 1 | immstat == 2
	gen migration_background = 0
	replace migration_background = 1 if immstat == 3 | immstat == 4
	replace migration_background = 1 if immigrant == 1

	*census region
	gen census=1 if state=="ME"		// Northeast
	replace census=1 if state=="VT"
	replace census=1 if state=="NH"
	replace census=1 if state=="MA"
	replace census=1 if state=="CT"
	replace census=1 if state=="RI"
	replace census=1 if state=="NY"
	replace census=1 if state=="PA"
	replace census=1 if state=="NJ"
	replace census=2 if state=="ND"	// Midwest
	replace census=2 if state=="SD"
	replace census=2 if state=="NE"
	replace census=2 if state=="KS"
	replace census=2 if state=="MN"
	replace census=2 if state=="IA"
	replace census=2 if state=="MO"
	replace census=2 if state=="WI"
	replace census=2 if state=="IL"
	replace census=2 if state=="MI"
	replace census=2 if state=="IN"
	replace census=2 if state=="OH"
	replace census=3 if state=="OK" // South
	replace census=3 if state=="TX"
	replace census=3 if state=="AR"
	replace census=3 if state=="LA"
	replace census=3 if state=="KY"
	replace census=3 if state=="TN"
	replace census=3 if state=="MS"
	replace census=3 if state=="AL"
	replace census=3 if state=="WV"
	replace census=3 if state=="VA"
	replace census=3 if state=="MD"
	replace census=3 if state=="DE"
	replace census=3 if state=="NC"
	replace census=3 if state=="SC"
	replace census=3 if state=="GA"
	replace census=3 if state=="FL"
	replace census=4 if state=="WA"	// West
	replace census=4 if state=="OR"
	replace census=4 if state=="CA"
	replace census=4 if state=="MT"
	replace census=4 if state=="ID"
	replace census=4 if state=="WY"
	replace census=4 if state=="NV"
	replace census=4 if state=="UT"
	replace census=4 if state=="CO"
	replace census=4 if state=="AZ"
	replace census=4 if state=="NM"
	replace census=4 if state=="AK"
	replace census=4 if state=="HI"

	* household income (roughly 5 equally sized groups to match quintile coding in ANES)
	gen income=1 if faminc>=1 & faminc<=3
	replace income=2 if faminc>=4 & faminc<=5
	replace income=3 if faminc>=6 & faminc<=7
	replace income=4 if faminc>=8 & faminc<=11
	replace income=5 if faminc>=12 & faminc<=32

	* member of labor union in the household
	gen laborunion=1 if union==1		// member union
	replace laborunion=1 if unionhh==1	// household member is union member
	replace laborunion=0 if union!=1 & unionhh!=1 & union!=. & unionhh!=.

	* religion in 4 major groups
	gen religion=1 if religpew==1 					// Protestant
	replace religion=2 if religpew==2 				// Catholic
	replace religion=3 if religpew==5 				// Jewish
	replace religion=4 if religpew==3				// Other
	replace religion=4 if religpew==4				// Other
	replace religion=4 if religpew>=6 & religpew<=12	// Other
	
	* ideology (liberal conservative, 7 pt scale)
	gen ideology=CC16_340a
	recode ideology (8=.)
	
	* year
	gen year=2016

	* save dataset
	save "Datasets/Stata/cces/cces2016_coded.dta", replace


	** 2020 CES **
	
	* load data and add ruca information
	use "Datasets/Stata/cces/cces2020_common.dta", clear
	gen zip_code=string(inputzip)
	merge m:1 zip_code using "Datasets/Stata/cces/ruca2010-zipcode.dta"
	drop if _merge==2
	drop _merge

	* rural variable
	gen rural=0 if ruca1==1			// Metropolitan area core: primary flow within an urbanized area (UA)
	replace rural=0 if ruca1==2		// Metropolitan area high commuting: primary flow 30% or more to a UA
	replace rural=0 if ruca1==3		// Metropolitan area low commuting: primary flow 10% to 30% to a UA
	replace rural=1 if ruca1==4		// Micropolitan area core: primary flow within an Urban Cluster of 10,000 to 49,999 (large UC
	replace rural=1 if ruca1==5		// Micropolitan high commuting: primary flow 30% or more to a large UC
	replace rural=1 if ruca1==6		// Micropolitan low commuting: primary flow 10% to 30% to a large UC
	replace rural=1 if ruca1==7		// Small town core: primary flow within an Urban Cluster of 2,500 to 9,999 (small UC)
	replace rural=1 if ruca1==8		// Small town high commuting: primary flow 30% or more to a small UC
	replace rural=1 if ruca1==9		// Small town low commuting: primary flow 10% to 30% to a small UC
	replace rural=1 if ruca1==10	// Rural areas: primary flow to a tract outside a UA or UC

	* vote democrat vs rep
	gen votedem=0 if CC20_410==2		// Donald J. Trump
	replace votedem=1 if CC20_410==1	// Joe Biden
	
	*voterep vs democrats 
	gen voterep=0 if CC20_410==1
	replace voterep=1 if CC20_410==2
	
	* age group
	gen age=2020-birthyr
	gen agegroup=1 if age>=17 & age<=24 
	replace agegroup=2 if age>=25 & age<=34 
	replace agegroup=3 if age>=35 & age<=44 
	replace agegroup=4 if age>=45 & age<=54 
	replace agegroup=5 if age>=55 & age<=64 
	replace agegroup=6 if age>=65 & age<=74 
	replace agegroup=7 if age>=75 & age!=. 

	* sex
	gen female=1 if gender==2
	replace female=0 if gender==1		

	* education (note that this variable does not include the same categories as ANES)
	clonevar education=educ

	* race
	tab race, gen(racegroup)
	gen white=1 if race==1
	replace white=0 if race!=1 & race!=.

	* work status 
	gen work_status_cond = .
	label variable work_status_cond "Employment status (condensed)"
	replace work_status_cond = 1 if employ == 1 | employ == 2 | employ == 3    // Employed
	replace work_status_cond = 2 if employ == 4                                // Unemployed
	replace work_status_cond = 3 if employ == 5 | employ == 6                  // Retired/Disabled
	replace work_status_cond = 4 if employ == 7                                // Homemaker
	replace work_status_cond = 5 if employ == 8                                // Student
	replace work_status_cond = 6 if employ == 9                                // Other

	label define work_status_cond_lbl 1 "Employed" 2 "Unemployed" 3 "Retired/Disabled" ///
                                 4 "Homemaker" 5 "Student" 6 "Other"
	label values work_status_cond work_status_cond_lbl

	*census region
	gen census=1 if state=="ME"		// Northeast
	replace census=1 if state=="VT"
	replace census=1 if state=="NH"
	replace census=1 if state=="MA"
	replace census=1 if state=="CT"
	replace census=1 if state=="RI"
	replace census=1 if state=="NY"
	replace census=1 if state=="PA"
	replace census=1 if state=="NJ"
	replace census=2 if state=="ND"	// Midwest
	replace census=2 if state=="SD"
	replace census=2 if state=="NE"
	replace census=2 if state=="KS"
	replace census=2 if state=="MN"
	replace census=2 if state=="IA"
	replace census=2 if state=="MO"
	replace census=2 if state=="WI"
	replace census=2 if state=="IL"
	replace census=2 if state=="MI"
	replace census=2 if state=="IN"
	replace census=2 if state=="OH"
	replace census=3 if state=="OK" // South
	replace census=3 if state=="TX"
	replace census=3 if state=="AR"
	replace census=3 if state=="LA"
	replace census=3 if state=="KY"
	replace census=3 if state=="TN"
	replace census=3 if state=="MS"
	replace census=3 if state=="AL"
	replace census=3 if state=="WV"
	replace census=3 if state=="VA"
	replace census=3 if state=="MD"
	replace census=3 if state=="DE"
	replace census=3 if state=="NC"
	replace census=3 if state=="SC"
	replace census=3 if state=="GA"
	replace census=3 if state=="FL"
	replace census=4 if state=="WA"	// West
	replace census=4 if state=="OR"
	replace census=4 if state=="CA"
	replace census=4 if state=="MT"
	replace census=4 if state=="ID"
	replace census=4 if state=="WY"
	replace census=4 if state=="NV"
	replace census=4 if state=="UT"
	replace census=4 if state=="CO"
	replace census=4 if state=="AZ"
	replace census=4 if state=="NM"
	replace census=4 if state=="AK"
	replace census=4 if state=="HI"

	* household income (roughly 5 equally sized groups to match quintile coding in ANES)
	gen income=1 if faminc>=1 & faminc<=3
	replace income=2 if faminc>=4 & faminc<=5
	replace income=3 if faminc>=6 & faminc<=8
	replace income=4 if faminc>=9 & faminc<=11
	replace income=5 if faminc>=12 & faminc<=16

	* member of labor union in the household
	gen laborunion=1 if union==1		// member union
	replace laborunion=1 if unionhh==1	// household member is union member
	replace laborunion=0 if union!=1 & unionhh!=1 & union!=. & unionhh!=.
	
	* marital status
	gen married = 0
    replace married = 1 if marstat == 1

    * children
     gen children = 0
	replace children = 1 if child18 == 1
     
	 * migration
	gen immigrant = 0
	replace immigrant = 1 if immstat == 1 | immstat == 2
	gen migration_background = 0
	replace migration_background = 1 if immstat == 3 | immstat == 4
	replace migration_background = 1 if immigrant == 1

	* religion in 4 major groups
	gen religion=1 if religpew==1 					// Protestant
	replace religion=2 if religpew==2 				// Catholic
	replace religion=3 if religpew==5 				// Jewish
	replace religion=4 if religpew==3				// Other
	replace religion=4 if religpew==4				// Other
	replace religion=4 if religpew>=6 & religpew<=12	// Other
	
	* ideology (liberal conservative, 7 pt scale)
	gen ideology=CC20_340a
	recode ideology (8=.)
	
	* year
	gen year=2020

	* save dataset
	save "Datasets/Stata/cces/cces2020_coded.dta", replace

* -----------------------------------------------
* 3. load and code the cumulative ANES dataset 
* -----------------------------------------------

	* load the data
	use "Datasets/Stata/ANES/anes_timeseries_cdf_stata_20220916.dta", clear

	* rural respondent
	gen urbanism=VCF0111
	recode urbanism (0=.)			// RDD samples --> missing info

	gen rural=1 if VCF0111==3		// rural, small towns, outlying and adj...
	replace rural=0 if VCF0111==1	// central cities
	replace rural=0 if VCF0111==2	// suburban areas
	gen suburban=0
	replace suburban=1 if VCF0111==2

	* vote democrat vs rep
	gen votedem=0 if VCF0704==2
	replace votedem=1 if VCF0704==1
	
	*vote republican vs democrat
	gen voterep=0 if  VCF0704==1
	replace voterep=1 if VCF0704==2

	* votechoice (chosing democrat vs rep & others)
	gen votechoice=1 if VCF0704==1
	replace votechoice=0 if VCF0704==2|VCF0704==3

	* age group
	clonevar agegroup=VCF0102
	recode agegroup (0=.)		// missing

	* female
	gen female=1 if VCF0104==2
	replace female=0 if VCF0104==1		
	// 'other' (value = 3) only included in 2016, N = 11

	* education
	clonevar education=VCF0140
	recode education (0=.) (8=.) (9=.)		// missing

	* race
	clonevar race=VCF0105b
	recode race (0=.) (9=.)
	tab race, gen(racegroup)
	gen white=1 if VCF0105b==1
	replace white=0 if VCF0105b==2
	replace white=0 if VCF0105b==3
	replace white=0 if VCF0105b==4

	* children 
	gen children = 0
	replace children = 1 if VCF0139 > 0

	* migration 
	gen immigrant = 0
	replace immigrant = 1 if VCF0142 >= 200 & VCF0142 < 998
	gen migration_background = 0
	replace migration_background = 1 if VCF0143 == 5 
	replace migration_background = 1 if immigrant ==1

	* work status
	gen work_status_cond = .
	label variable work_status_cond "Employment status (condensed)"
	replace work_status_cond = 1 if VCF0116 == 1 | VCF0116 == 2    // Employed (Working now + Temp laid off)
	replace work_status_cond = 2 if VCF0116 == 4                   // Unemployed
	replace work_status_cond = 3 if VCF0116 == 5 | VCF0116 == 6    // Retired/Disabled
	replace work_status_cond = 4 if VCF0116 == 7                   // Homemaker
	replace work_status_cond = 5 if VCF0116 == 8                   // Student
	replace work_status_cond = 6 if VCF0116 == 9                   // NA; DK

	label define work_status_cond_lbl 1 "Employed" 2 "Unemployed" 3 "Retired/Disabled" ///
                                 4 "Homemaker" 5 "Student" 6 "NA; DK"
	label values work_status_cond work_status_cond_lbl

	* state
	rename VCF0901b state

	* census region
	gen census=1 if VCF0112==1		// Northeast
	replace census=2 if VCF0112==2	// Midwest
	replace census=3 if VCF0112==3	// South
	replace census=4 if VCF0112==4	// West

	* household income
	/* Note on coding: 1: from 0 to 16 percentile; 2: 17 to 33; 3: 34 to 67; 68 to 95; 96 to 100 */
	gen income=1 if VCF0114==1
	replace income=2 if VCF0114==2
	replace income=3 if VCF0114==3
	replace income=4 if VCF0114==4
	replace income=5 if VCF0114==5
	recode income (0=.)

	* member of labor union in the household
	gen laborunion=1 if VCF0127==1
	replace laborunion=0 if VCF0127==2

	* religion in 4 major groups
	gen religion=1 if VCF0128==1 		// Protestant
	replace religion=2 if VCF0128==2 	// Catholic
	replace religion=3 if VCF0128==3 	// Jewish
	replace religion=4 if VCF0128==3	// Other (includes none)
	recode religion (0=.)

	* marital status
	gen married =1 if VCF0147==1 
	replace married= 0 if VCF0147!=1
	gen nevermarried=1 if VCF0147==2 
	replace nevermarried=0 if  VCF0147!=2

	* importance of religion (1= important, 0: not important)
	gen religiosity=1 if VCF0846==1
	replace religiosity=0 if VCF0846==2

	* drop years that are coded in a separate file (in cumulative file rural/urban is only coded until 2000)
	drop if VCF0004==2004
	drop if VCF0004==2008
	drop if VCF0004==2012
	drop if VCF0004==2016
	drop if VCF0004==2020
	
	* rename election year variable
	rename VCF0004 year
	
	
* ----------------------------------------------
* 4. append CCS information
* ----------------------------------------------

	append using "Datasets/Stata/cces/cces2008_coded.dta"
	append using "Datasets/Stata/cces/cces2012_coded.dta"
	append using "Datasets/Stata/cces/cces2016_coded.dta"
	append using "Datasets/Stata/cces/cces2020_coded.dta"

	gen weight=V201 if year==2008
	replace weight=V103 if year==2012
	replace weight=commonweight if year==2016 | year==2020
	
	gen dataset=""
	replace dataset="ANES" if inrange(year,1952,2000)
	replace dataset="CES" if inrange(year,2008,2020)
	
	gen eyear=.
	replace eyear=year
	
* ----------------------------------------------------------------
* 5. recode race, religion and education variables for analyses
* ----------------------------------------------------------------

	* new race variable
	gen race_recoded = .

	replace race_recoded = 1 if racegroup1 == 1
	replace race_recoded = 2 if racegroup2 == 1
	replace race_recoded = 3 if racegroup3 == 1
	replace race_recoded = 4 if racegroup4 == 1 | racegroup5 == 1 | racegroup6 == 1 | racegroup7 == 1 | racegroup8 == 1 

	label define racelab 1 "White non-Hispanic" 2 "Black non-Hispanic" 3 "Hispanic" 4 "Other or multiple races" 
	label values race_recoded racelab
	
	* new religion variable
	gen religion_recoded=.
	replace religion_recoded=1 if religion==1
	replace religion_recoded=2 if religion==2
	replace religion_recoded=3 if inrange(religion,3,4)

	* new education variable (higher educated vs. not)
	gen higher_educated=.
	replace higher_educated=1 if inrange(education,3,6)
	replace higher_educated=0 if inrange(education,1,2)
	
	
* ----------------------------------------------
* 6. recode areas CES
* ----------------------------------------------	

	* create a more detailed variable for urban-rural analysis
	gen area_type = .
	replace area_type = 1 if ruca1 == 1       // Metropolitan area core = large cities
	replace area_type = 2 if ruca1 == 2 | ruca1 == 3  // Metropolitan area high/low commuting = suburban
	replace area_type = 3 if ruca1 >= 4 & ruca1 <= 6  // Micropolitan areas = small cities
	replace area_type = 4 if ruca1 >= 7 & ruca1 <= 9  // Small town core and commuting areas = rural towns
	replace area_type = 5 if ruca1 == 10         // Rural areas = isolated rural areas

	* add labels for these categories
	label define area_type_lbl 1 "Large cities" 2 "Suburban areas" 3 "Small cities" 4 "Rural towns" 5 "Isolated rural areas"
	label values area_type area_type_lbl
	
	* create a simplified three-category version for long-term analysis
	gen area_type_simple = .
	replace area_type_simple = 1 if VCF0111==1 & inrange(year,1952,2000) | ruca1 == 1 & inrange(year,2008,2020)         // Urban
	replace area_type_simple = 2 if VCF0111==2 & inrange(year,1952,2000) | (ruca1 == 2 | ruca1 == 3) & inrange(year,2008,2020)  // Suburban
	replace area_type_simple = 3 if VCF0111==3 & inrange(year,1952,2000) | inrange(ruca1,4,10) & inrange(year,2008,2020) // Rural

	* add labels for simplified categories
	label define area_simple_lbl 1 "Urban" 2 "Suburban" 3 "Rural"
	label values area_type_simple area_simple_lbl

* ----------------------------------------------
* 7. save thhe fully coded dataset
* ----------------------------------------------

	save "Datasets/Stata/ANES_CCES_coded/ANESCCES-coded.dta", replace









